library(rworldmap)
library(plyr)
library(dplyr)
library(purrr)
library(magrittr)
library(ggplot2)
library(GGally)
library(devtools)
library(viridis)
load_all()
## Loading sos
data(country_data)
country_data <- country_data[!is.na(country_data$systems_per_cap), ]
country_data <- na.omit(country_data)
data_map <- joinCountryData2Map(dF = country_data, joinCode = "ISO3", nameJoinColumn = "iso3", verbose = FALSE)
## 163 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 82 codes from the map weren't represented in your data
I merged various country-level datasets to get a data frame with the following columns.
iso3)[A map doesn’t make sense for this variable.]
daly_all)spplot(data_map, zcol = "daly_all",
col.regions = viridis(100),
main = "Dalys Lost To All Causes")
daly_comm)spplot(data_map, zcol = "daly_comm",
col.regions = viridis(100),
main = "Dalys Lost To Communicable Diseases")
death_all)spplot(data_map, zcol = "death_all",
col.regions = viridis(100),
main = "Deaths From All Causes")
death_comm)spplot(data_map, zcol = "death_comm",
col.regions = viridis(100),
main = "Deaths From Communicable Diseases")
diseases)spplot(data_map, zcol = "diseases",
col.regions = viridis(100),
main = "Number Of Different Diseases Recorded In That Country, Per Gideon")
gdp_per_cap)spplot(data_map, zcol = "gdp_per_cap",
col.regions = viridis(100),
main = "Gdp Per Capita")
health_exp_tot_usd)spplot(data_map, zcol = "health_exp_tot_usd",
col.regions = viridis(100),
main = "Total Health Expenditure Per Capita, In Usd")
health_exp_govt_usd)spplot(data_map, zcol = "health_exp_govt_usd",
col.regions = viridis(100),
main = "Government Health Expenditure Per Capita, In Usd")
population)spplot(data_map, zcol = "population",
col.regions = viridis(100),
main = "Population")
systems)spplot(data_map, zcol = "systems",
col.regions = viridis(100),
main = "Number Of Surveillance Systems")
systems_per_cap)spplot(data_map, zcol = "systems_per_cap",
col.regions = viridis(100),
main = "Surveillance Systems Per Capita")
ggpairs(country_data[-1])
qplot(daly_all, systems_per_cap, data = country_data) + geom_smooth(method = "lm")
m <- lm(systems_per_cap ~ daly_all, data = country_data, weights = systems)
summary(m)
##
## Call:
## lm(formula = systems_per_cap ~ daly_all, data = country_data,
## weights = systems)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -7.298e-05 -1.306e-05 -8.070e-06 1.300e-06 3.493e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.503e-06 1.694e-06 5.019 1.37e-06 ***
## daly_all -1.070e-10 4.685e-11 -2.284 0.0237 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.204e-05 on 161 degrees of freedom
## Multiple R-squared: 0.03138, Adjusted R-squared: 0.02536
## F-statistic: 5.216 on 1 and 161 DF, p-value: 0.02369
There’s a negative correlation between the DALY rate and number of systems per capita. However, it isn’t very strong.
qplot(daly_comm, systems_per_cap, data = country_data) + geom_smooth(method = "lm")
m <- lm(systems_per_cap ~ daly_comm, data = country_data, weights = systems)
summary(m)
##
## Call:
## lm(formula = systems_per_cap ~ daly_comm, data = country_data,
## weights = systems)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -7.122e-05 -1.389e-05 -9.470e-06 1.400e-07 3.517e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.138e-06 9.910e-07 6.194 4.71e-09 ***
## daly_comm -1.279e-10 6.533e-11 -1.958 0.0519 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.222e-05 on 161 degrees of freedom
## Multiple R-squared: 0.02326, Adjusted R-squared: 0.01719
## F-statistic: 3.834 on 1 and 161 DF, p-value: 0.05194
For the communicable diseases DALY rate, the same correlation is true, but it’s less strong (“marginally significant”).
qplot(death_all, systems_per_cap, data = country_data) + geom_smooth(method = "lm")
m <- lm(systems_per_cap ~ death_all, data = country_data, weights = systems)
summary(m)
##
## Call:
## lm(formula = systems_per_cap ~ death_all, data = country_data,
## weights = systems)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -7.391e-05 -1.280e-05 -7.280e-06 1.030e-06 3.485e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.751e-06 1.742e-06 5.024 1.34e-06 ***
## death_all -5.354e-09 2.265e-09 -2.364 0.0193 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.2e-05 on 161 degrees of freedom
## Multiple R-squared: 0.03354, Adjusted R-squared: 0.02753
## F-statistic: 5.587 on 1 and 161 DF, p-value: 0.01929
Again, the same correlation is true for death from all causes. It’s a bit stronger here.
qplot(death_comm, systems_per_cap, data = country_data) + geom_smooth(method = "lm")
m <- lm(systems_per_cap ~ death_comm, data = country_data, weights = systems)
summary(m)
##
## Call:
## lm(formula = systems_per_cap ~ death_comm, data = country_data,
## weights = systems)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -7.051e-05 -1.398e-05 -9.300e-06 -3.000e-07 3.520e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.097e-06 9.833e-07 6.201 4.54e-09 ***
## death_comm -6.975e-09 3.599e-09 -1.938 0.0544 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.223e-05 on 161 degrees of freedom
## Multiple R-squared: 0.02279, Adjusted R-squared: 0.01672
## F-statistic: 3.755 on 1 and 161 DF, p-value: 0.05441
Death from communicable diseases looks similar to DALYs from communicable diseases.
ggpairs(country_data[, 2:5])
# If it surprises anybody, these are all very collinear. I'm going to pick only one of them to continue with.
country_data %<>%
select(-daly_comm, -death_all, -death_comm)
qplot(diseases, systems_per_cap, data = country_data) + geom_smooth(method = "lm")
m <- lm(systems_per_cap ~ diseases, data = country_data, weights = systems)
summary(m)
##
## Call:
## lm(formula = systems_per_cap ~ diseases, data = country_data,
## weights = systems)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -6.269e-05 -1.682e-05 -9.330e-06 5.800e-07 3.215e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.101e-05 1.147e-05 6.192 4.76e-09 ***
## diseases -3.132e-07 5.442e-08 -5.756 4.24e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.89e-05 on 161 degrees of freedom
## Multiple R-squared: 0.1707, Adjusted R-squared: 0.1655
## F-statistic: 33.13 on 1 and 161 DF, p-value: 4.244e-08
There is a strong negative correlation between number of diseases observed per country in GIDEON and number of surveillance systems in a country. However, the causality here could be different–it could be that we don’t observe diseases because there aren’t many surveillance systems covering the population.
qplot(gdp_per_cap, systems_per_cap, data = country_data) + geom_smooth(method = "lm")
m <- lm(systems_per_cap ~ gdp_per_cap, data = country_data, weights = systems)
summary(m)
##
## Call:
## lm(formula = systems_per_cap ~ gdp_per_cap, data = country_data,
## weights = systems)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -9.378e-05 -8.890e-06 -5.900e-06 -1.270e-06 3.581e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.558e-06 1.267e-06 2.019 0.04512 *
## gdp_per_cap 9.696e-11 3.519e-11 2.755 0.00654 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.175e-05 on 161 degrees of freedom
## Multiple R-squared: 0.04503, Adjusted R-squared: 0.0391
## F-statistic: 7.592 on 1 and 161 DF, p-value: 0.006539
Positive correlation, significant. The more GDP per capita you have, the higher your number of systems per capita. It’s significant all the time, moreso if you weight for population or nothing.
qplot(health_exp_tot_usd, systems_per_cap, data = country_data) + geom_smooth(method = "lm")
m <- lm(systems_per_cap ~ health_exp_tot_usd, data = country_data, weights = systems)
summary(m)
##
## Call:
## lm(formula = systems_per_cap ~ health_exp_tot_usd, data = country_data,
## weights = systems)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -7.818e-05 -1.280e-05 -1.036e-05 -5.560e-06 3.569e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.623e-06 1.184e-06 3.905 0.000139 ***
## health_exp_tot_usd 2.007e-10 3.036e-10 0.661 0.509478
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.266e-05 on 161 degrees of freedom
## Multiple R-squared: 0.002707, Adjusted R-squared: -0.003487
## F-statistic: 0.4371 on 1 and 161 DF, p-value: 0.5095
No significant correlation.
qplot(health_exp_govt_usd, systems_per_cap, data = country_data) + geom_smooth(method = "lm")
m <- lm(systems_per_cap ~ health_exp_govt_usd, data = country_data, weights = systems)
summary(m)
##
## Call:
## lm(formula = systems_per_cap ~ health_exp_govt_usd, data = country_data,
## weights = systems)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -8.141e-05 -1.144e-05 -9.000e-06 -4.270e-06 3.579e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.026e-06 1.195e-06 3.369 0.000944 ***
## health_exp_govt_usd 6.215e-10 4.553e-10 1.365 0.174103
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.247e-05 on 161 degrees of freedom
## Multiple R-squared: 0.01144, Adjusted R-squared: 0.005303
## F-statistic: 1.864 on 1 and 161 DF, p-value: 0.1741
No significant correlation.
qplot(health_exp_tot_usd, health_exp_govt_usd, data = country_data)
# These are really collinear. I'm removing government healthcare spending.
country_data %<>%
select(-health_exp_govt_usd)
Weirdly, even though GDP per capita and healthcare expenditure per capita are similarly distributed (\(R^2 = 0.82\)), the p-value for the univariate model including GDP is 0.00687, whereas for healthcare expenditure per capita it’s 0.50757.
I removed all but one from the groups of collinear variables (DALY indicators, healthcare expenditure indicators).
na.omit() on the data frame. China must be missing on one of the variables in the data frame, so I’m going to look it up and fix that data merging problem, because it’s an important country to include.log(systems_per_capita)?gdp_per_cap, daly_all, diseases, health_exp_tot_usd.weights = systemscountry_data$systems_per_1000000 <- country_data$systems_per_cap * 1000000
m1 <- glm(systems_per_1000000 ~ gdp_per_cap, data = country_data, weights = systems)
summary(m1)
##
## Call:
## glm(formula = systems_per_1000000 ~ gdp_per_cap, data = country_data,
## weights = systems)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -93.78 -8.89 -5.90 -1.27 358.10
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.558e+00 1.267e+00 2.019 0.04512 *
## gdp_per_cap 9.696e-05 3.519e-05 2.755 0.00654 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 1742.751)
##
## Null deviance: 293814 on 162 degrees of freedom
## Residual deviance: 280583 on 161 degrees of freedom
## AIC: 1300.1
##
## Number of Fisher Scoring iterations: 2
# Significant, of course.
m2 <- glm(systems_per_1000000 ~ diseases, data = country_data, weights = systems)
summary(m2)
##
## Call:
## glm(formula = systems_per_1000000 ~ diseases, data = country_data,
## weights = systems)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -62.69 -16.82 -9.33 0.58 321.51
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 71.00704 11.46752 6.192 4.76e-09 ***
## diseases -0.31324 0.05442 -5.756 4.24e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 1513.491)
##
## Null deviance: 293814 on 162 degrees of freedom
## Residual deviance: 243672 on 161 degrees of freedom
## AIC: 1277.1
##
## Number of Fisher Scoring iterations: 2
# Significant, positive.
m3 <- glm(systems_per_1000000 ~ daly_all, data = country_data, weights = systems)
summary(m3)
##
## Call:
## glm(formula = systems_per_1000000 ~ daly_all, data = country_data,
## weights = systems)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -72.98 -13.06 -8.07 1.30 349.31
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.503e+00 1.694e+00 5.019 1.37e-06 ***
## daly_all -1.070e-04 4.685e-05 -2.284 0.0237 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 1767.665)
##
## Null deviance: 293814 on 162 degrees of freedom
## Residual deviance: 284594 on 161 degrees of freedom
## AIC: 1302.4
##
## Number of Fisher Scoring iterations: 2
# Significant, of course.
m4 <- glm(systems_per_1000000 ~ health_exp_tot_usd, data = country_data, weights = systems)
summary(m4)
##
## Call:
## glm(formula = systems_per_1000000 ~ health_exp_tot_usd, data = country_data,
## weights = systems)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -78.18 -12.80 -10.36 -5.56 356.93
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.6228064 1.1839116 3.905 0.000139 ***
## health_exp_tot_usd 0.0002007 0.0003036 0.661 0.509478
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 1819.988)
##
## Null deviance: 293814 on 162 degrees of freedom
## Residual deviance: 293018 on 161 degrees of freedom
## AIC: 1307.2
##
## Number of Fisher Scoring iterations: 2
# Not significant
m5 <- glm(systems_per_1000000 ~ gdp_per_cap + diseases , data = country_data, weights = systems)
summary(m5)
##
## Call:
## glm(formula = systems_per_1000000 ~ gdp_per_cap + diseases, data = country_data,
## weights = systems)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -71.62 -13.06 -4.83 4.26 324.79
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.551e+01 1.162e+01 5.639 7.58e-08 ***
## gdp_per_cap 7.111e-05 3.277e-05 2.170 0.0315 *
## diseases -2.961e-01 5.438e-02 -5.446 1.91e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 1479.411)
##
## Null deviance: 293814 on 162 degrees of freedom
## Residual deviance: 236706 on 160 degrees of freedom
## AIC: 1274.4
##
## Number of Fisher Scoring iterations: 2
# All are still significant.
m6 <- glm(systems_per_1000000 ~ gdp_per_cap * diseases , data = country_data, weights = systems)
summary(m6)
##
## Call:
## glm(formula = systems_per_1000000 ~ gdp_per_cap * diseases, data = country_data,
## weights = systems)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -71.97 -11.65 -5.33 1.65 328.68
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.418e+01 1.866e+01 2.368 0.0191 *
## gdp_per_cap 7.259e-04 4.503e-04 1.612 0.1090
## diseases -1.946e-01 8.824e-02 -2.206 0.0288 *
## gdp_per_cap:diseases -3.145e-06 2.157e-06 -1.458 0.1468
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 1469.077)
##
## Null deviance: 293814 on 162 degrees of freedom
## Residual deviance: 233583 on 159 degrees of freedom
## AIC: 1274.2
##
## Number of Fisher Scoring iterations: 2
# No, this messes things up. We remove the interaction term.
m7 <- glm(systems_per_1000000 ~ gdp_per_cap + diseases + daly_all , data = country_data, weights = systems)
summary(m7)
##
## Call:
## glm(formula = systems_per_1000000 ~ gdp_per_cap + diseases +
## daly_all, data = country_data, weights = systems)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -71.31 -13.49 -5.17 3.65 325.54
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.557e+01 1.165e+01 5.627 8.10e-08 ***
## gdp_per_cap 7.773e-05 4.034e-05 1.927 0.0558 .
## diseases -2.996e-01 5.586e-02 -5.363 2.84e-07 ***
## daly_all 1.546e-05 5.462e-05 0.283 0.7775
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 1487.965)
##
## Null deviance: 293814 on 162 degrees of freedom
## Residual deviance: 236586 on 159 degrees of freedom
## AIC: 1276.3
##
## Number of Fisher Scoring iterations: 2
# No, we won't keep this. It isn't significant, and it bumps gdp_per_cap up above 0.05.
m8 <- glm(systems_per_1000000 ~ gdp_per_cap + diseases + health_exp_tot_usd , data = country_data, weights = systems)
summary(m8)
##
## Call:
## glm(formula = systems_per_1000000 ~ gdp_per_cap + diseases +
## health_exp_tot_usd, data = country_data, weights = systems)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -73.85 -12.52 -4.68 3.21 328.85
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.512e+01 1.485e+01 3.711 0.000285 ***
## gdp_per_cap 1.663e-04 9.098e-05 1.827 0.069517 .
## diseases -2.480e-01 6.928e-02 -3.579 0.000458 ***
## health_exp_tot_usd -8.617e-04 7.687e-04 -1.121 0.263992
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 1477.042)
##
## Null deviance: 293814 on 162 degrees of freedom
## Residual deviance: 234850 on 159 degrees of freedom
## AIC: 1275.1
##
## Number of Fisher Scoring iterations: 2
# And, same.
# So for this section, our winning model is:
m9 <- glm(systems_per_1000000 ~ gdp_per_cap + diseases , data = country_data, weights = systems)
summary(m9)
##
## Call:
## glm(formula = systems_per_1000000 ~ gdp_per_cap + diseases, data = country_data,
## weights = systems)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -71.62 -13.06 -4.83 4.26 324.79
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.551e+01 1.162e+01 5.639 7.58e-08 ***
## gdp_per_cap 7.111e-05 3.277e-05 2.170 0.0315 *
## diseases -2.961e-01 5.438e-02 -5.446 1.91e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 1479.411)
##
## Null deviance: 293814 on 162 degrees of freedom
## Residual deviance: 236706 on 160 degrees of freedom
## AIC: 1274.4
##
## Number of Fisher Scoring iterations: 2
# All are still significant.
models <- list(m1, m2, m3, m4, m5, m6, m7, m8, m9)
print_summary <- function(x) {
cat("\n")
print(x$formula)
# cat("\n")
print(knitr::kable(summary(x)$coef))
cat("\n")
}
models %>%
map(print_summary)
systems_per_1000000 ~ gdp_per_cap
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 2.558310 | 1.2669645 | 2.019243 | 0.0451210 |
| gdp_per_cap | 0.000097 | 0.0000352 | 2.755332 | 0.0065391 |
systems_per_1000000 ~ diseases
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 71.0070414 | 11.4675160 | 6.192016 | 0 |
| diseases | -0.3132355 | 0.0544204 | -5.755849 | 0 |
systems_per_1000000 ~ daly_all
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 8.502787 | 1.6942875 | 5.018503 | 0.0000014 |
| daly_all | -0.000107 | 0.0000469 | -2.283780 | 0.0236915 |
systems_per_1000000 ~ health_exp_tot_usd
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 4.6228064 | 1.1839116 | 3.9046887 | 0.0001385 |
| health_exp_tot_usd | 0.0002007 | 0.0003036 | 0.6611229 | 0.5094785 |
systems_per_1000000 ~ gdp_per_cap + diseases
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 65.5068853 | 11.6175397 | 5.638619 | 0.0000001 |
| gdp_per_cap | 0.0000711 | 0.0000328 | 2.169986 | 0.0314829 |
| diseases | -0.2961403 | 0.0543779 | -5.445970 | 0.0000002 |
systems_per_1000000 ~ gdp_per_cap * diseases
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 44.1765229 | 18.6570563 | 2.367818 | 0.0190957 |
| gdp_per_cap | 0.0007259 | 0.0004503 | 1.611959 | 0.1089540 |
| diseases | -0.1946181 | 0.0882350 | -2.205678 | 0.0288412 |
| gdp_per_cap:diseases | -0.0000031 | 0.0000022 | -1.457905 | 0.1468391 |
systems_per_1000000 ~ gdp_per_cap + diseases + daly_all
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 65.5655515 | 11.6529216 | 5.6265333 | 0.0000001 |
| gdp_per_cap | 0.0000777 | 0.0000403 | 1.9269623 | 0.0557675 |
| diseases | -0.2995635 | 0.0558591 | -5.3628393 | 0.0000003 |
| daly_all | 0.0000155 | 0.0000546 | 0.2831212 | 0.7774522 |
systems_per_1000000 ~ gdp_per_cap + diseases + health_exp_tot_usd
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 55.1216537 | 14.8520199 | 3.711391 | 0.0002846 |
| gdp_per_cap | 0.0001663 | 0.0000910 | 1.827381 | 0.0695173 |
| diseases | -0.2479501 | 0.0692845 | -3.578726 | 0.0004580 |
| health_exp_tot_usd | -0.0008617 | 0.0007687 | -1.120968 | 0.2639917 |
systems_per_1000000 ~ gdp_per_cap + diseases
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 65.5068853 | 11.6175397 | 5.638619 | 0.0000001 |
| gdp_per_cap | 0.0000711 | 0.0000328 | 2.169986 | 0.0314829 |
| diseases | -0.2961403 | 0.0543779 | -5.445970 | 0.0000002 |
[[1]] NULL
[[2]] NULL
[[3]] NULL
[[4]] NULL
[[5]] NULL
[[6]] NULL
[[7]] NULL
[[8]] NULL
[[9]] NULL
#'
weights = systemsm1 <- glm(systems_per_1000000 ~ gdp_per_cap, data = country_data)
summary(m1)
##
## Call:
## glm(formula = systems_per_1000000 ~ gdp_per_cap, data = country_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -11.428 -2.603 -1.758 -0.172 65.160
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.054e+00 8.019e-01 2.562 0.0113 *
## gdp_per_cap 1.281e-04 3.137e-05 4.083 7e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 69.44316)
##
## Null deviance: 12338 on 162 degrees of freedom
## Residual deviance: 11180 on 161 degrees of freedom
## AIC: 1157.8
##
## Number of Fisher Scoring iterations: 2
# Significant, of course.
m2 <- glm(systems_per_1000000 ~ diseases, data = country_data)
summary(m2)
##
## Call:
## glm(formula = systems_per_1000000 ~ diseases, data = country_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -7.613 -3.965 -1.698 1.055 60.644
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 61.26451 9.77900 6.265 3.27e-09 ***
## diseases -0.27220 0.04635 -5.872 2.39e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 63.11445)
##
## Null deviance: 12338 on 162 degrees of freedom
## Residual deviance: 10161 on 161 degrees of freedom
## AIC: 1142.2
##
## Number of Fisher Scoring iterations: 2
# Not significant
m3 <- glm(systems_per_1000000 ~ daly_all, data = country_data)
summary(m3)
##
## Call:
## glm(formula = systems_per_1000000 ~ daly_all, data = country_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -6.051 -3.956 -2.409 0.350 64.364
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.539e+00 1.417e+00 5.320 3.43e-07 ***
## daly_all -8.777e-05 3.061e-05 -2.868 0.00469 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 72.90864)
##
## Null deviance: 12338 on 162 degrees of freedom
## Residual deviance: 11738 on 161 degrees of freedom
## AIC: 1165.7
##
## Number of Fisher Scoring iterations: 2
# Significant, positive
m4 <- glm(systems_per_1000000 ~ health_exp_tot_usd, data = country_data)
summary(m4)
##
## Call:
## glm(formula = systems_per_1000000 ~ health_exp_tot_usd, data = country_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -11.943 -2.797 -2.337 -0.438 65.331
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.7246064 0.7771305 3.506 0.00059 ***
## health_exp_tot_usd 0.0010679 0.0003468 3.079 0.00244 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 72.36978)
##
## Null deviance: 12338 on 162 degrees of freedom
## Residual deviance: 11652 on 161 degrees of freedom
## AIC: 1164.5
##
## Number of Fisher Scoring iterations: 2
# Significant, positive
m5 <- lm(systems_per_1000000 ~ gdp_per_cap + diseases , data = country_data)
summary(m5)
##
## Call:
## lm(formula = systems_per_1000000 ~ gdp_per_cap + diseases, data = country_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.811 -3.649 -1.636 1.205 60.806
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.164e+01 1.034e+01 4.994 1.53e-06 ***
## gdp_per_cap 7.895e-05 3.114e-05 2.535 0.0122 *
## diseases -2.320e-01 4.826e-02 -4.808 3.49e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.814 on 160 degrees of freedom
## Multiple R-squared: 0.2082, Adjusted R-squared: 0.1983
## F-statistic: 21.04 on 2 and 160 DF, p-value: 7.743e-09
# All are still significant.
m6 <- lm(systems_per_1000000 ~ gdp_per_cap * diseases , data = country_data)
summary(m6)
##
## Call:
## lm(formula = systems_per_1000000 ~ gdp_per_cap * diseases, data = country_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.663 -3.193 -1.381 1.027 60.290
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.209e+01 1.202e+01 2.670 0.00838 **
## gdp_per_cap 1.650e-03 5.256e-04 3.139 0.00202 **
## diseases -1.380e-01 5.663e-02 -2.436 0.01594 *
## gdp_per_cap:diseases -7.771e-06 2.596e-06 -2.994 0.00320 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.626 on 159 degrees of freedom
## Multiple R-squared: 0.2505, Adjusted R-squared: 0.2363
## F-statistic: 17.71 on 3 and 159 DF, p-value: 5.734e-10
# All are significant here, and the R^2 is better. But conceptually... how should I use weights?
m7 <- lm(systems_per_1000000 ~ gdp_per_cap + diseases + daly_all , data = country_data)
summary(m7)
##
## Call:
## lm(formula = systems_per_1000000 ~ gdp_per_cap + diseases + daly_all,
## data = country_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.844 -3.650 -1.658 1.209 60.820
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.166e+01 1.039e+01 4.973 1.69e-06 ***
## gdp_per_cap 7.957e-05 3.541e-05 2.247 0.026 *
## diseases -2.324e-01 4.952e-02 -4.694 5.76e-06 ***
## daly_all 1.263e-06 3.389e-05 0.037 0.970
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.838 on 159 degrees of freedom
## Multiple R-squared: 0.2082, Adjusted R-squared: 0.1933
## F-statistic: 13.94 on 3 and 159 DF, p-value: 4.104e-08
# DALYs aren't significant here.
m8 <- lm(systems_per_1000000 ~ gdp_per_cap + diseases + health_exp_tot_usd , data = country_data)
summary(m8)
##
## Call:
## lm(formula = systems_per_1000000 ~ gdp_per_cap + diseases + health_exp_tot_usd,
## data = country_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.770 -3.560 -1.594 1.330 60.937
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.069e+01 1.057e+01 4.796 3.70e-06 ***
## gdp_per_cap 1.089e-04 7.255e-05 1.500 0.135
## diseases -2.278e-01 4.928e-02 -4.622 7.83e-06 ***
## health_exp_tot_usd -3.473e-04 7.606e-04 -0.457 0.649
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.833 on 159 degrees of freedom
## Multiple R-squared: 0.2092, Adjusted R-squared: 0.1943
## F-statistic: 14.02 on 3 and 159 DF, p-value: 3.709e-08
# Health expenditure isn't significant and it fucks with GDP's significance.
# So, without `weights = systems`, our final model is:
m9 <- lm(systems_per_1000000 ~ gdp_per_cap * diseases , data = country_data)
summary(m9)
##
## Call:
## lm(formula = systems_per_1000000 ~ gdp_per_cap * diseases, data = country_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.663 -3.193 -1.381 1.027 60.290
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.209e+01 1.202e+01 2.670 0.00838 **
## gdp_per_cap 1.650e-03 5.256e-04 3.139 0.00202 **
## diseases -1.380e-01 5.663e-02 -2.436 0.01594 *
## gdp_per_cap:diseases -7.771e-06 2.596e-06 -2.994 0.00320 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.626 on 159 degrees of freedom
## Multiple R-squared: 0.2505, Adjusted R-squared: 0.2363
## F-statistic: 17.71 on 3 and 159 DF, p-value: 5.734e-10
models <- list(m1, m2, m3, m4, m5, m6, m7, m8, m9)
models %>%
map(print_summary)
systems_per_1000000 ~ gdp_per_cap
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 2.0541806 | 0.8019172 | 2.561587 | 0.0113373 |
| gdp_per_cap | 0.0001281 | 0.0000314 | 4.082633 | 0.0000700 |
systems_per_1000000 ~ diseases
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 61.2645121 | 9.7790040 | 6.264903 | 0 |
| diseases | -0.2721971 | 0.0463531 | -5.872245 | 0 |
systems_per_1000000 ~ daly_all
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 7.5389563 | 1.4171512 | 5.319797 | 0.0000003 |
| daly_all | -0.0000878 | 0.0000306 | -2.867577 | 0.0046903 |
systems_per_1000000 ~ health_exp_tot_usd
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 2.7246064 | 0.7771305 | 3.505983 | 0.0005895 |
| health_exp_tot_usd | 0.0010679 | 0.0003468 | 3.079452 | 0.0024390 |
NULL
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 51.638516 | 10.3405492 | 4.993788 | 0.0000015 |
| gdp_per_cap | 0.000079 | 0.0000311 | 2.535215 | 0.0121972 |
| diseases | -0.232045 | 0.0482636 | -4.807864 | 0.0000035 |
NULL
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 32.0901888 | 12.0208877 | 2.669536 | 0.0083841 |
| gdp_per_cap | 0.0016496 | 0.0005256 | 3.138757 | 0.0020228 |
| diseases | -0.1379609 | 0.0566282 | -2.436258 | 0.0159442 |
| gdp_per_cap:diseases | -0.0000078 | 0.0000026 | -2.993542 | 0.0031985 |
NULL
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 51.6593847 | 10.3880702 | 4.9729530 | 0.0000017 |
| gdp_per_cap | 0.0000796 | 0.0000354 | 2.2474051 | 0.0259898 |
| diseases | -0.2324329 | 0.0495206 | -4.6936607 | 0.0000058 |
| daly_all | 0.0000013 | 0.0000339 | 0.0372729 | 0.9703141 |
NULL
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 50.6938341 | 10.5706620 | 4.7957104 | 0.0000037 |
| gdp_per_cap | 0.0001089 | 0.0000725 | 1.5004474 | 0.1354817 |
| diseases | -0.2277647 | 0.0492832 | -4.6215442 | 0.0000078 |
| health_exp_tot_usd | -0.0003473 | 0.0007606 | -0.4566105 | 0.6485742 |
NULL
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 32.0901888 | 12.0208877 | 2.669536 | 0.0083841 |
| gdp_per_cap | 0.0016496 | 0.0005256 | 3.138757 | 0.0020228 |
| diseases | -0.1379609 | 0.0566282 | -2.436258 | 0.0159442 |
| gdp_per_cap:diseases | -0.0000078 | 0.0000026 | -2.993542 | 0.0031985 |
[[1]] NULL
[[2]] NULL
[[3]] NULL
[[4]] NULL
[[5]] NULL
[[6]] NULL
[[7]] NULL
[[8]] NULL
[[9]] NULL
#'
I should ask Andrew about whether to use weights = systems. I think conceptually it is the right thing to do.